usa.gov data

This example is taken and adapted from the "Data Analysis for Python" book by Wes MCKinney. First we load data from URL shortening service bit.ly about the usa.gov domain.


In [23]:
%matplotlib inline
path = 'data/usagov_bitly_data2012-03-16-1331923249.txt'

In [5]:
open(path).readline()


Out[5]:
'{ "a": "Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/535.11 (KHTML, like Gecko) Chrome\\/17.0.963.78 Safari\\/535.11", "c": "US", "nk": 1, "tz": "America\\/New_York", "gr": "MA", "g": "A6qOVH", "h": "wfLQtf", "l": "orofrog", "al": "en-US,en;q=0.8", "hh": "1.usa.gov", "r": "http:\\/\\/www.facebook.com\\/l\\/7AQEFzjSi\\/1.usa.gov\\/wfLQtf", "u": "http:\\/\\/www.ncbi.nlm.nih.gov\\/pubmed\\/22415991", "t": 1331923247, "hc": 1331822918, "cy": "Danvers", "ll": [ 42.576698, -70.954903 ] }\n'

In [6]:
import json
records = [json.loads(line) for line in open(path)]

In [7]:
records[0]


Out[7]:
{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
 u'al': u'en-US,en;q=0.8',
 u'c': u'US',
 u'cy': u'Danvers',
 u'g': u'A6qOVH',
 u'gr': u'MA',
 u'h': u'wfLQtf',
 u'hc': 1331822918,
 u'hh': u'1.usa.gov',
 u'l': u'orofrog',
 u'll': [42.576698, -70.954903],
 u'nk': 1,
 u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
 u't': 1331923247,
 u'tz': u'America/New_York',
 u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}

In [8]:
records[0]['tz']


Out[8]:
u'America/New_York'

In [9]:
time_zones =[rec['tz'] for rec in records if 'tz' in rec]

In [10]:
time_zones[:10]


Out[10]:
[u'America/New_York',
 u'America/Denver',
 u'America/New_York',
 u'America/Sao_Paulo',
 u'America/New_York',
 u'America/New_York',
 u'Europe/Warsaw',
 u'',
 u'',
 u'']

In [11]:
def  get_counts(sequence):
    counts = {}
    for x in sequence:
        if x in counts:
            counts[x] += 1
        else:
            counts[x] = 1
    return counts

In [13]:
counts = get_counts(time_zones)
counts['America/New_York']


Out[13]:
1251

In [14]:
len(time_zones)


Out[14]:
3440

In [15]:
def top_counts(count_dict, n = 10):
    value_key_pairs = [(count, tz) for tz, count in count_dict.items()]
    value_key_pairs.sort()
    return value_key_pairs[-n:]

In [16]:
top_counts(counts)


Out[16]:
[(33, u'America/Sao_Paulo'),
 (35, u'Europe/Madrid'),
 (36, u'Pacific/Honolulu'),
 (37, u'Asia/Tokyo'),
 (74, u'Europe/London'),
 (191, u'America/Denver'),
 (382, u'America/Los_Angeles'),
 (400, u'America/Chicago'),
 (521, u''),
 (1251, u'America/New_York')]

Counting Time Zones with Pandas


In [24]:
from pandas import DataFrame, Series
import pandas as pd; import numpy as np

In [26]:
frame = DataFrame(records)
frame


Out[26]:
_heartbeat_ a al c cy g gr h hc hh kw l ll nk r t tz u
0 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Danvers A6qOVH MA wfLQtf 1331822918 1.usa.gov NaN orofrog [42.576698, -70.954903] 1 http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/... 1331923247 America/New_York http://www.ncbi.nlm.nih.gov/pubmed/22415991
1 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 j.mp NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331923249 America/Denver http://www.monroecounty.gov/etc/911/rss.php
2 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-US US Washington xxr3Qb DC xxr3Qb 1331919941 1.usa.gov NaN bitly [38.9007, -77.043098] 1 http://t.co/03elZC4Q 1331923250 America/New_York http://boxer.senate.gov/en/press/releases/0316...
3 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... pt-br BR Braz zCaLwp 27 zUtuOu 1331923068 1.usa.gov NaN alelex88 [-23.549999, -46.616699] 0 direct 1331923249 America/Sao_Paulo http://apod.nasa.gov/apod/ap120312.html
4 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Shrewsbury 9b6kNl MA 9b6kNl 1273672411 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331923251 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
5 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Shrewsbury axNK8c MA axNK8c 1273672506 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331923252 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
6 NaN Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1... pl-PL,pl;q=0.8,en-US;q=0.6,en;q=0.4 PL Luban wcndER 77 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [51.116699, 15.2833] 0 http://plus.url.google.com/url?sa=z&n=13319232... 1331923255 Europe/Warsaw http://www.nasa.gov/mission_pages/nustar/main/...
7 NaN Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/2... bg,en-us;q=0.7,en;q=0.3 None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://www.facebook.com/ 1331923255 http://www.nasa.gov/mission_pages/nustar/main/...
8 NaN Opera/9.80 (X11; Linux zbov; U; en) Presto/2.1... en-US, en None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331923254 http://www.nasa.gov/mission_pages/nustar/main/...
9 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4 None NaN zCaLwp NaN zUtuOu 1331923068 1.usa.gov NaN alelex88 NaN 0 http://t.co/o1Pd0WeV 1331923255 http://apod.nasa.gov/apod/ap120312.html
10 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Seattle vNJS4H WA u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [47.5951, -122.332603] 1 direct 1331923258 America/Los_Angeles https://www.nysdot.gov/rexdesign/design/commun...
11 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4... en-us,en;q=0.5 US Washington wG7OIH DC A0nRz4 1331815838 1.usa.gov NaN darrellissa [38.937599, -77.092796] 0 http://t.co/ND7SoPyo 1331923259 America/New_York http://oversight.house.gov/wp-content/uploads/...
12 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Alexandria vNJS4H VA u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [38.790901, -77.094704] 1 direct 1331923259 America/New_York https://www.nysdot.gov/rexdesign/design/commun...
13 1331923261 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 NaN Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US... en-us,en;q=0.5 US Marietta 2rOUYc GA 2rOUYc 1255769846 1.usa.gov NaN bitly [33.953201, -84.5177] 1 direct 1331923262 America/New_York http://toxtown.nlm.nih.gov/index.php
15 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4 HK Central District nQvgJp 00 rtrrth 1317318030 j.mp NaN walkeryuen [22.2833, 114.150002] 1 http://forum2.hkgolden.com/view.aspx?type=BW&m... 1331923263 Asia/Hong_Kong http://www.ssd.noaa.gov/PS/TROP/TCFP/data/curr...
16 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4 HK Central District XdUNr 00 qWkgbq 1317318039 j.mp NaN walkeryuen [22.2833, 114.150002] 1 http://forum2.hkgolden.com/view.aspx?type=BW&m... 1331923263 Asia/Hong_Kong http://www.usno.navy.mil/NOOC/nmfc-ph/RSS/jtwc...
17 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; r... en-us,en;q=0.5 US Buckfield zH1BFf ME x3jOIv 1331839576 1.usa.gov NaN andyzieminski [44.299702, -70.369797] 0 http://t.co/6Cx4ROLs 1331923264 America/New_York http://www.usda.gov/wps/portal/usda/usdahome?c...
18 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 1.usa.gov NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331923262 America/Denver http://www.monroecounty.gov/etc/911/rss.php
19 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... it-IT,it;q=0.8,en-US;q=0.6,en;q=0.4 IT Venice wcndER 20 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [45.438599, 12.3267] 0 http://www.facebook.com/ 1331923264 Europe/Rome http://www.nasa.gov/mission_pages/nustar/main/...
20 NaN Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... es-ES ES Alcal zQ95Hi 51 ytZYWR 1331670549 bitly.com NaN jplnews [37.516701, -5.9833] 0 http://www.facebook.com/ 1331923265 Africa/Ceuta http://voyager.jpl.nasa.gov/imagesvideo/uranus...
21 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6... en-us,en;q=0.5 US Davidsonville wcndER MD zkpJBR 1331922854 1.usa.gov NaN bnjacobs [38.939201, -76.635002] 0 http://www.facebook.com/ 1331923267 America/New_York http://www.nasa.gov/mission_pages/nustar/main/...
22 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-us US Hockessin y3ZImz DE y3ZImz 1331064158 1.usa.gov NaN bitly [39.785, -75.682297] 0 direct 1331923267 America/New_York http://portal.hud.gov/hudportal/documents/hudd...
23 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)... en-us US Lititz wWiOiD PA wWiOiD 1330217829 1.usa.gov NaN bitly [40.174999, -76.3078] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331923267 America/New_York http://www.tricare.mil/mybenefit/ProfileFilter...
24 NaN Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES... es-es,es;q=0.8,en-us;q=0.5,en;q=0.3 ES Bilbao wcndER 59 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [43.25, -2.9667] 0 http://www.facebook.com/ 1331923268 Europe/Madrid http://www.nasa.gov/mission_pages/nustar/main/...
25 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... en-GB,en;q=0.8,en-US;q=0.6,en-AU;q=0.4 MY Kuala Lumpur wcndER 14 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [3.1667, 101.699997] 0 http://www.facebook.com/ 1331923269 Asia/Kuala_Lumpur http://www.nasa.gov/mission_pages/nustar/main/...
26 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... ro-RO,ro;q=0.8,en-US;q=0.6,en;q=0.4 CY Nicosia wcndER 04 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [35.166698, 33.366699] 0 http://www.facebook.com/?ref=tn_tnmn 1331923268 Asia/Nicosia http://www.nasa.gov/mission_pages/nustar/main/...
27 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-US,en;q=0.8 BR SPaulo zCaLwp 27 zUtuOu 1331923068 1.usa.gov NaN alelex88 [-23.5333, -46.616699] 0 direct 1331923269 America/Sao_Paulo http://apod.nasa.gov/apod/ap120312.html
28 NaN Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X)... en-us None NaN vNJS4H NaN u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa NaN 0 direct 1331923270 https://www.nysdot.gov/rexdesign/design/commun...
29 NaN Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X... en-us None NaN FPX0IM NaN FPX0IL 1331922978 1.usa.gov NaN twittershare NaN 1 http://t.co/5xlp0B34 1331923270 http://www.ed.gov/news/media-advisories/us-dep...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3530 NaN Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1... en-US,en;q=0.8 US San Francisco xVZg4P CA wqUkTo 1331908247 go.nasa.gov NaN nasatwitter [37.7645, -122.429398] 0 http://www.facebook.com/l.php?u=http%3A%2F%2Fg... 1331926815 America/Los_Angeles http://www.nasa.gov/multimedia/imagegallery/im...
3531 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6... en-US None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 direct 1331926816 http://www.nasa.gov/mission_pages/nustar/main/...
3532 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Washington Au3aUS DC A9ct6C 1331926420 1.usa.gov NaN ncsha [38.904202, -77.031998] 1 http://www.ncsha.org/ 1331926817 America/New_York http://portal.hud.gov/hudportal/HUD?src=/press...
3533 NaN Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) A... en-us US Jacksonville b2UtUJ FL ieCdgH 1301393171 go.nasa.gov NaN nasatwitter [30.279301, -81.585098] 1 direct 1331926818 America/New_York http://apod.nasa.gov/apod/
3534 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-us US Frisco vNJS4H TX u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [33.149899, -96.855499] 1 direct 1331926820 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3535 NaN Mozilla/5.0 (Windows NT 5.1; rv:10.0.2) Gecko/... en-us US Houston zIgLx8 TX yrPaLt 1331903484 aash.to NaN aashto [29.775499, -95.415199] 1 direct 1331926823 America/Chicago http://ntl.bts.gov/lib/44000/44300/44374/FHWA-...
3536 NaN Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; e... en-US,en;q=0.5 None NaN xIcyim NaN yG1TTf 1331728309 go.nasa.gov NaN nasatwitter NaN 0 http://t.co/g1VKE8zS 1331926824 http://www.nasa.gov/mission_pages/hurricanes/a...
3537 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... es-es,es;q=0.8,en-us;q=0.5,en;q=0.3 HN Tegucigalpa zCaLwp 08 w63FZW 1331546756 1.usa.gov NaN bufferapp [14.1, -87.216698] 0 http://t.co/A8TJyibE 1331926825 America/Tegucigalpa http://apod.nasa.gov/apod/ap120312.html
3538 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Los Angeles qMac9k CA qds1Ge 1310473559 1.usa.gov NaN healthypeople [34.041599, -118.298798] 0 direct 1331926825 America/Los_Angeles http://healthypeople.gov/2020/connect/webinars...
3539 NaN Mozilla/5.0 (compatible; Fedora Core 3) FC3 KDE NaN US Bellevue zu2M5o WA zDhdro 1331586192 bit.ly NaN glimtwin [47.615398, -122.210297] 0 direct 1331926827 America/Los_Angeles http://www.federalreserve.gov/newsevents/press...
3540 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Payson wcndER UT zkpJBR 1331922854 1.usa.gov NaN bnjacobs [40.014198, -111.738899] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926828 America/Denver http://www.nasa.gov/mission_pages/nustar/main/...
3541 NaN Mozilla/5.0 (X11; U; OpenVMS AlphaServer_ES40;... NaN US Bellevue zu2M5o WA zDhdro 1331586192 1.usa.gov NaN glimtwin [47.615398, -122.210297] 0 direct 1331926828 America/Los_Angeles http://www.federalreserve.gov/newsevents/press...
3542 NaN Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... en-us US Pittsburg y3reI1 CA y3reI1 1331926120 1.usa.gov NaN bitly [38.0051, -121.838699] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926829 America/Los_Angeles http://www.sba.gov/community/blogs/community-b...
3543 1331926831 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3544 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0.1) ... en-us,en;q=0.5 US Wentzville vNJS4H MO u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [38.790001, -90.854897] 1 direct 1331926831 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3545 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Saint Charles vNJS4H IL u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [41.9352, -88.290901] 1 direct 1331926832 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3546 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Los Angeles qMac9k CA qds1Ge 1310473559 1.usa.gov NaN healthypeople [34.041599, -118.298798] 1 direct 1331926833 America/Los_Angeles http://healthypeople.gov/2020/connect/webinars...
3547 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-us US Silver Spring y0jYkg MD y0jYkg 1331851811 1.usa.gov NaN bitly [39.052101, -77.014999] 1 direct 1331926836 America/New_York http://www.epa.gov/otaq/regs/fuels/additive/e1...
3548 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Mcgehee y5rMac AR xANY6O 1331916302 1.usa.gov NaN twitterfeed [33.628399, -91.356903] 1 https://twitter.com/fdarecalls/status/18069759... 1331926836 America/Chicago http://www.fda.gov/Safety/Recalls/ucm296326.htm
3549 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... sv-SE,sv;q=0.8,en-US;q=0.6,en;q=0.4 SE Sollefte eH8wu 24 7dtjei 1260316355 1.usa.gov NaN tweetdeckapi [63.166698, 17.266701] 1 direct 1331926834 Europe/Stockholm http://www.nasa.gov/mission_pages/WISE/main/in...
3550 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-us US Conshohocken A00b72 PA yGSwzn 1331917632 1.usa.gov NaN addthis [40.0798, -75.2855] 0 http://www.linkedin.com/home?trk=hb_tab_home_top 1331926837 America/New_York http://www.nlm.nih.gov/medlineplus/news/fullst...
3551 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://plus.url.google.com/url?sa=z&n=13319268... 1331926837 http://www.nasa.gov/mission_pages/nustar/main/...
3552 NaN Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US... NaN US Decatur rqgJuE AL xcz8vt 1331227417 1.usa.gov NaN bootsnall [34.572701, -86.940598] 0 direct 1331926839 America/Chicago http://travel.state.gov/passport/passport_5535...
3553 NaN Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... en-us US Shrewsbury 9b6kNl MA 9b6kNl 1273672411 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331926840 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
3554 NaN Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... en-us US Shrewsbury axNK8c MA axNK8c 1273672506 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331926840 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
3555 NaN Mozilla/4.0 (compatible; MSIE 9.0; Windows NT ... en US Paramus e5SvKE NJ fqPSr9 1301298479 1.usa.gov NaN tweetdeckapi [40.9445, -74.07] 1 direct 1331926841 America/New_York http://www.fda.gov/AdvisoryCommittees/Committe...
3556 NaN Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1... en-US,en;q=0.8 US Oklahoma City jQLtP4 OK jQLtP4 1307530247 1.usa.gov NaN bitly [35.4715, -97.518997] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926844 America/Chicago http://www.okc.gov/PublicNotificationSystem/Fo...
3557 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 j.mp NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331926846 America/Denver http://www.monroecounty.gov/etc/911/rss.php
3558 NaN GoogleProducer NaN US Mountain View zjtI4X CA zjtI4X 1327528527 1.usa.gov NaN bitly [37.419201, -122.057404] 0 direct 1331926847 America/Los_Angeles http://www.ahrq.gov/qual/qitoolkit/
3559 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-US US Mc Lean qxKrTK VA qxKrTK 1312897670 1.usa.gov NaN bitly [38.935799, -77.162102] 0 http://t.co/OEEEvwjU 1331926849 America/New_York http://herndon-va.gov/Content/public_safety/Pu...

3560 rows × 18 columns


In [27]:
frame['tz'][:10]


Out[27]:
0     America/New_York
1       America/Denver
2     America/New_York
3    America/Sao_Paulo
4     America/New_York
5     America/New_York
6        Europe/Warsaw
7                     
8                     
9                     
Name: tz, dtype: object

In [28]:
tz_counts = frame['tz'].value_counts()

In [29]:
tz_counts[:10]


Out[29]:
America/New_York       1251
                        521
America/Chicago         400
America/Los_Angeles     382
America/Denver          191
Europe/London            74
Asia/Tokyo               37
Pacific/Honolulu         36
Europe/Madrid            35
America/Sao_Paulo        33
dtype: int64

In [30]:
clean_tz = frame['tz'].fillna('Missing')

In [31]:
clean_tz[clean_tz == ''] = 'Unkown'

In [32]:
tz_counts = clean_tz.value_counts()

In [33]:
tz_counts[:10]


Out[33]:
America/New_York       1251
Unkown                  521
America/Chicago         400
America/Los_Angeles     382
America/Denver          191
Missing                 120
Europe/London            74
Asia/Tokyo               37
Pacific/Honolulu         36
Europe/Madrid            35
dtype: int64

In [34]:
tz_counts[:10].plot(kind='barh', rot=0)


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x11911d350>

In [35]:
frame['a'][1]


Out[35]:
u'GoogleMaps/RochesterNY'

In [36]:
frame['a'][50]


Out[36]:
u'Mozilla/5.0 (Windows NT 5.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'

In [37]:
frame['a'][51]


Out[37]:
u'Mozilla/5.0 (Linux; U; Android 2.2.2; en-us; LG-P925/V10e Build/FRG83G) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'

In [38]:
results = Series([x.split()[0] for x in frame.a.dropna()])

In [39]:
results[:5]


Out[39]:
0               Mozilla/5.0
1    GoogleMaps/RochesterNY
2               Mozilla/4.0
3               Mozilla/5.0
4               Mozilla/5.0
dtype: object

In [40]:
results.value_counts()[:8]


Out[40]:
Mozilla/5.0                 2594
Mozilla/4.0                  601
GoogleMaps/RochesterNY       121
Opera/9.80                    34
TEST_INTERNET_AGENT           24
GoogleProducer                21
Mozilla/6.0                    5
BlackBerry8520/5.0.0.681       4
dtype: int64

In [41]:
cframe= frame[frame.a.notnull()]

In [42]:
operating_system = np.where(cframe['a'].str.contains('Windows'), 'Windows','Not Windows')

In [43]:
operating_system[:5]


Out[43]:
array(['Windows', 'Not Windows', 'Windows', 'Not Windows', 'Windows'], 
      dtype='|S11')

In [44]:
by_tz_os = cframe.groupby(['tz', operating_system])

In [45]:
agg_counts = by_tz_os.size().unstack().fillna(0)
agg_counts[:10]


Out[45]:
Not Windows Windows
tz
245 276
Africa/Cairo 0 3
Africa/Casablanca 0 1
Africa/Ceuta 0 2
Africa/Johannesburg 0 1
Africa/Lusaka 0 1
America/Anchorage 4 1
America/Argentina/Buenos_Aires 1 0
America/Argentina/Cordoba 0 1
America/Argentina/Mendoza 0 1

In [46]:
indexer= agg_counts.sum(1).argsort()

In [47]:
indexer[:10]


Out[47]:
tz
                                  24
Africa/Cairo                      20
Africa/Casablanca                 21
Africa/Ceuta                      92
Africa/Johannesburg               87
Africa/Lusaka                     53
America/Anchorage                 54
America/Argentina/Buenos_Aires    57
America/Argentina/Cordoba         26
America/Argentina/Mendoza         55
dtype: int64

In [48]:
count_subset = agg_counts.take(indexer)[-10:]

In [49]:
count_subset


Out[49]:
Not Windows Windows
tz
America/Sao_Paulo 13 20
Europe/Madrid 16 19
Pacific/Honolulu 0 36
Asia/Tokyo 2 35
Europe/London 43 31
America/Denver 132 59
America/Los_Angeles 130 252
America/Chicago 115 285
245 276
America/New_York 339 912

In [50]:
count_subset.plot(kind='barh',stacked=True)


Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x119196910>

In [51]:
normed_subset = count_subset.div(count_subset.sum(1), axis=0)
normed_subset.plot(kind='barh', stacked= True)


Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x1195af6d0>

In [ ]: